The data

The data consists of all trees (h >= 2 m) present in a 1 ha plot. Trees were extracted from a 2018 scan (n = 1018) and a 2019 scan (n = 920) following a major fire event.
With the exception of one extreme outlier, the natural variation in tree volume was preserved for the establishment of the model.

Libraries and Functions

library(devtools)
library(plyr)
library(scales)
library(tidyr)
library(nlme)
library(dplyr)
library(ggplot2)
library(AICcmodavg)
library(visreg)
library(ggforce)
library(bbmle)
library(lmtest)
library(knitr)
library(performance)
library(caret)
# function to calculate Root Mean Square Error (RMSE) of model predictions
RMSE<-function(Obs,Pred){sqrt(mean((Obs-Pred)^2))}

# function to calculate the relative error (%) of each model prediction
relative_err<-function(Obs,Pred){((Pred-Obs)/Obs)*100}

Data prep

# remove outlier/ NAs
df <- df[!(df$ID=="2018SOR_449.txt"),]
df <- df %>% drop_na(CrownArea_rTLS)


# rename variables
df$H <- df$Height_rTLS
df$CA <- df$CrownArea_rTLS
df$V <- df$voxel_Volume_VoxR_04


# create compound variable
df$H_CA <- df$H * df$CA

# log H_CA and centre at its mean for use with year as interaction term
df$log_H_CA <- log(df$H_CA)
df$H_CA_log_centred <- scale(df$log_H_CA, scale=FALSE)
df$H_CA_log_centred <- as.numeric(df$H_CA_log_centred)

# assign trees to log scale bins for data binning/ thinning
df$V_class <- cut(log(df$V),
                     breaks = hist(log(df$V), 
                                   breaks = seq(min(log(df$V)-0.01), 
                                                max(log(df$V)+0.01),
                                                len=9),
                                   plot=F)$breaks)

df$V_class <- as.numeric(df$V_class)

# Data binning: mean of full data bins to fit final model (include year)
df18 <- filter(df, year == 2018)
df19 <- filter(df, year == 2019)

df_bin_18 <- ddply(df18, .(V_class),summarise,
                  N_trees =length(V),
                  V  = mean(V),
                  H  = mean(H),
                  CA = mean(CA),
                  H_CA = mean(H_CA),
                  H_CA_log_centred = mean(H_CA_log_centred),
                  year = mean(year))

df_bin_19 <- ddply(df19, .(V_class),summarise,
                  N_trees =length(V),
                  V  = mean(V),
                  H  = mean(H),
                  CA = mean(CA),
                  H_CA = mean(H_CA),
                  H_CA_log_centred = mean(H_CA_log_centred),
                  year = mean(year))

df_bin_y <- rbind(df_bin_18, df_bin_19)

# Data thinning: equal random sub-sample of full data bins to fit final model
df_thin <- data.frame()
for (l in 1:8) {
  tmp <- filter(df, V_class == l) 
  sample <- sample_n(tmp, 64, replace = FALSE)
  df_thin <- rbind(df_thin,sample)
}

# set reps for Relative error plots
reps <- 100

Raw plots

The relationship between tree height H and total tree volume V was heteroscedastic. With a log transformation applied to both variables the relationship is more linear but with a large spread.
The relationship between crown area CA and total tree volume V was also improved by applying a log transformation to both variables but with increased spread in the smaller values.
Creating a compound variable H_CA and applying a log transformation to both variables produced the best linear relationship with total tree volume V with reduced spread in the lower values.

V ~ H

ggplot(df, aes(x = H, y = V)) + geom_point()

log(V) ~ log(H)

ggplot(df, aes(x = log(H), y = log(V))) + geom_point()

V ~ CA

ggplot(df, aes(x = CA, y = V)) + geom_point()

log(V) ~ log(CA)

ggplot(df, aes(x = log(CA), y = log(V))) + geom_point()

V ~ H_CA

ggplot(df, aes(x = H_CA, y = V)) + geom_point()

log(V) ~ log(H_CA)

ggplot(df, aes(x = log(H_CA), y = log(V))) + geom_point()

Relative error plots

Each model is run 100 times where each rep uses new randomly sampled cal-val data.
The relative error of each rep is plotted.

Relative error plots are based on Jucker et al. 20171

M1 raw data model

M1 H_CA

Raw_data_model<-data.frame(matrix(nrow=reps, ncol=3)) 
colnames(Raw_data_model)[1:3]<-c("intercept","beta_log_HCA","RMSE") 

plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
     xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)

for (i in 1:reps){
  
  # Set up validation data
  Validation_data <- data.frame()
  for (j in 1:8) {
    tmp <- filter(df, V_class == j) 
    sample <- sample_n(tmp, 24, replace = FALSE)
    Validation_data <- rbind(Validation_data,sample)
  }

  # use rest to train model  
  Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
  Data_model<-merge(df,Model_trees)                                       # data for model fitting (90% of trees)


  ### Fit models and generate predictions
  M1<-lm(log(V)~log(H_CA),data=Data_model)
  Validation_data$V_pred_M1<-exp(predict(M1,Validation_data))
  Validation_data$Error_M1<-relative_err(Validation_data$V,Validation_data$V_pred_M1)
  lines(smooth.spline(Validation_data$Error_M1 ~ Validation_data$V,nknots=5),col=alpha("chartreuse3",0.25), lwd=0.5)

  # Model fit to raw data
  Raw_data_model[i,"intercept"]<-coef(M1)[1]
  Raw_data_model[i,"beta_log_HCA"]<-coef(M1)[2]
  Raw_data_model[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M1)
}

M1_y H_CA + year

Raw_data_model_y<-data.frame(matrix(nrow=reps, ncol=4)) 
colnames(Raw_data_model_y)[1:4]<-c("intercept","beta_log_HCA","beta_year","RMSE") 

plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
     xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)

for (i in 1:reps){
  
  # Set up validation data
  Validation_data <- data.frame()
  for (j in 1:8) {
    tmp <- filter(df, V_class == j) 
    sample <- sample_n(tmp, 24, replace = FALSE)
    Validation_data <- rbind(Validation_data,sample)
  }
  
  # use rest to train model  
  Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
  Data_model<-merge(df,Model_trees)                                       # data for model fitting (90% of trees)


  ### Fit models and generate predictions
  M1_y<-lm(log(V)~log(H_CA) + factor(year),data=Data_model)
  Validation_data$V_pred_M1_y<-exp(predict(M1_y,Validation_data))
  Validation_data$Error_M1_y<-relative_err(Validation_data$V,Validation_data$V_pred_M1_y)
  lines(smooth.spline(Validation_data$Error_M1_y ~ Validation_data$V,nknots=5),col=alpha("chartreuse3",0.25),lwd=0.5)
  
  # Model fit to raw data
  Raw_data_model_y[i,"intercept"]<-coef(M1_y)[1]
  Raw_data_model_y[i,"beta_log_HCA"]<-coef(M1_y)[2]
  Raw_data_model_y[i,"beta_year"]<-coef(M1_y)[3]
  Raw_data_model_y[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M1_y)
}

M1y H_CA * year

Raw_data_modely<-data.frame(matrix(nrow=reps, ncol=4)) 
colnames(Raw_data_modely)[1:4]<-c("intercept","beta_log_HCA","beta_year","RMSE") 

plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
     xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)

for (i in 1:reps){
  
  # Set up validation data
  Validation_data <- data.frame()
  for (j in 1:8) {
    tmp <- filter(df, V_class == j) 
    sample <- sample_n(tmp, 24, replace = FALSE)
    Validation_data <- rbind(Validation_data,sample)
  }
  
  # use rest to train model  
  Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
  Data_model<-merge(df,Model_trees)                                       # data for model fitting (90% of trees)


  ### Fit models and generate predictions
  M1y<-lm(log(V)~H_CA_log_centred * factor(year),data=Data_model)
  Validation_data$V_pred_M1y<-exp(predict(M1y,Validation_data))
  Validation_data$Error_M1y<-relative_err(Validation_data$V,Validation_data$V_pred_M1y)
  lines(smooth.spline(Validation_data$Error_M1y ~ Validation_data$V,nknots=5),col=alpha("chartreuse3",0.25),lwd=0.5)
  
  # Model fit to raw data
  Raw_data_modely[i,"intercept"]<-coef(M1y)[1]
  Raw_data_modely[i,"beta_log_HCA"]<-coef(M1y)[2]
  Raw_data_modely[i,"beta_year"]<-coef(M1y)[3]
  Raw_data_modely[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M1y)
}

M1a H + CA

Raw_data_modela<-data.frame(matrix(nrow=reps, ncol=3)) 
colnames(Raw_data_modela)[1:3]<-c("intercept","beta_log_HCA","RMSE") 

plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
     xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)

for (i in 1:reps){
  
  # Set up validation data
  Validation_data <- data.frame()
  for (j in 1:8) {
    tmp <- filter(df, V_class == j) 
    sample <- sample_n(tmp, 24, replace = FALSE)
    Validation_data <- rbind(Validation_data,sample)
  }
  
  # use rest to train model  
  Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
  Data_model<-merge(df,Model_trees)                                       # data for model fitting (90% of trees)


  ### Fit models and generate predictions
  M1a <- lm(log(V)~log(H)+log(CA),data=Data_model)
  Validation_data$V_pred_M1a<-exp(predict(M1a,Validation_data))
  Validation_data$Error_M1a<-relative_err(Validation_data$V,Validation_data$V_pred_M1a)
  lines(smooth.spline(Validation_data$Error_M1a ~ Validation_data$V,nknots=5),col=alpha("chartreuse3",0.25),lwd=0.5)
  
  # Model fit to raw data
  Raw_data_modela[i,"intercept"]<-coef(M1a)[1]
  Raw_data_modela[i,"beta_log_HCA"]<-coef(M1a)[2]
  Raw_data_modela[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M1a)
}

M1b H * CA

Raw_data_modelb<-data.frame(matrix(nrow=reps, ncol=3)) 
colnames(Raw_data_modelb)[1:3]<-c("intercept","beta_log_HCA","RMSE") 

plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
     xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)

for (i in 1:reps){
  
  # Set up validation data
  Validation_data <- data.frame()
  for (j in 1:8) {
    tmp <- filter(df, V_class == j) 
    sample <- sample_n(tmp, 24, replace = FALSE)
    Validation_data <- rbind(Validation_data,sample)
  }
  
  # use rest to train model  
  Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
  Data_model<-merge(df,Model_trees)                                       # data for model fitting (90% of trees)


  ### Fit models and generate predictions
  M1b <- lm(log(V)~log(H)*log(CA),data=Data_model)
  Validation_data$V_pred_M1b<-exp(predict(M1b,Validation_data))
  Validation_data$Error_M1b<-relative_err(Validation_data$V,Validation_data$V_pred_M1b)
  lines(smooth.spline(Validation_data$Error_M1b ~ Validation_data$V,nknots=5),col=alpha("chartreuse3",0.25),lwd=0.5)
  
  # Model fit to raw data
  Raw_data_modelb[i,"intercept"]<-coef(M1b)[1]
  Raw_data_modelb[i,"beta_log_HCA"]<-coef(M1b)[2]
  Raw_data_modelb[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M1b)
}

M2 binned data model

M2 H_CA

Binned_data_model<-data.frame(matrix(nrow=reps, ncol=3))  
colnames(Binned_data_model)[1:3]<-c("intercept","beta_log_HCA","RMSE") 

plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
     xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)

for (i in 1:reps){
  
  # Set up validation data
  Validation_data <- data.frame()
  for (j in 1:8) {
    tmp <- filter(df, V_class == j)
    sample <- sample_n(tmp, 24, replace = FALSE)
    Validation_data <- rbind(Validation_data,sample)
  }

  # use rest to train model  
  Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
  Data_model<-merge(df,Model_trees)                                       # data for model fitting (90% of trees)

  # Data binning to include year
  Data_model18 <- filter(Data_model, year == 2018)
  Data_model19 <- filter(Data_model, year == 2019)

  Data_bin_18 <- ddply(Data_model18, .(V_class),summarise,
                    N_trees =length(V),
                    V  = mean(V),
                    H  = mean(H),
                    CA = mean(CA),
                    H_CA = mean(H_CA),
                    year = mean(year))

  Data_bin_19 <- ddply(Data_model19, .(V_class),summarise,
                    N_trees =length(V),
                    V  = mean(V),
                    H  = mean(H),
                    CA = mean(CA),
                    H_CA = mean(H_CA),
                    year = mean(year))

  Data_bin_y <- rbind(Data_bin_18, Data_bin_19)
  
  # Train model
  M2<-lm(log(V)~log(H_CA),data=Data_bin_y)
  Validation_data$V_pred_M2<-exp(predict(M2,Validation_data))
  Validation_data$Error_M2<-relative_err(Validation_data$V,Validation_data$V_pred_M2)
  lines(smooth.spline(Validation_data$Error_M2 ~ Validation_data$V,nknots=5),col=alpha("cornflowerblue",0.25),lwd=0.5)

  # Model fit to binned data
  Binned_data_model[i,"intercept"]<-coef(M2)[1]
  Binned_data_model[i,"beta_log_HCA"]<-coef(M2)[2]
  Binned_data_model[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M2)
}

M2_y H_CA + year

Binned_data_model_y<-data.frame(matrix(nrow=reps, ncol=4))  
colnames(Binned_data_model_y)[1:4]<-c("intercept","beta_log_HCA","beta_year","RMSE") 

plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
     xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)

for (i in 1:reps){
  
  # Set up validation data
  Validation_data <- data.frame()
  for (j in 1:8) {
    tmp <- filter(df, V_class == j) 
    sample <- sample_n(tmp, 24, replace = FALSE)
    Validation_data <- rbind(Validation_data,sample)
  }
  
  # use rest to train model  
  Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
  Data_model<-merge(df,Model_trees)                                       # data for model fitting (90% of trees)

  # Data binning to include year
  Data_model18 <- filter(Data_model, year == 2018)
  Data_model19 <- filter(Data_model, year == 2019)

  Data_bin_18 <- ddply(Data_model18, .(V_class),summarise,
                    N_trees =length(V),
                    V  = mean(V),
                    H  = mean(H),
                    CA = mean(CA),
                    H_CA = mean(H_CA),
                    year = mean(year))

  Data_bin_19 <- ddply(Data_model19, .(V_class),summarise,
                    N_trees =length(V),
                    V  = mean(V),
                    H  = mean(H),
                    CA = mean(CA),
                    H_CA = mean(H_CA),
                    year = mean(year))

  Data_bin_y <- rbind(Data_bin_18, Data_bin_19)
  
  # Train model
  M2_y<-lm(log(V)~log(H_CA) + factor(year),data=Data_bin_y)
  Validation_data$V_pred_M2_y<-exp(predict(M2_y,Validation_data))
  Validation_data$Error_M2_y<-relative_err(Validation_data$V,Validation_data$V_pred_M2_y)
  lines(smooth.spline(Validation_data$Error_M2_y ~ Validation_data$V,nknots=5),col=alpha("cornflowerblue",0.25),lwd=0.5)

  # Model fit to binned data
  Binned_data_model_y[i,"intercept"]<-coef(M2_y)[1]
  Binned_data_model_y[i,"beta_log_HCA"]<-coef(M2_y)[2]
  Binned_data_model_y[i,"beta_year"]<-coef(M2_y)[3]
  Binned_data_model_y[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M2_y)
}

M2y H_CA * year

Binned_data_modely<-data.frame(matrix(nrow=reps, ncol=4))  
colnames(Binned_data_modely)[1:4]<-c("intercept","beta_log_HCA","beta_year","RMSE") 

plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
     xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)

for (i in 1:reps){
  
  # Set up validation data
  Validation_data <- data.frame()
  for (j in 1:8) {
    tmp <- filter(df, V_class == j) 
    sample <- sample_n(tmp, 24, replace = FALSE)
    Validation_data <- rbind(Validation_data,sample)
  }
  
  # use rest to train model  
  Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
  Data_model<-merge(df,Model_trees)                                       # data for model fitting (90% of trees)

  # Data binning to include year
  Data_model18 <- filter(Data_model, year == 2018)
  Data_model19 <- filter(Data_model, year == 2019)

  Data_bin_18 <- ddply(Data_model18, .(V_class),summarise,
                    N_trees =length(V),
                    V  = mean(V),
                    H  = mean(H),
                    CA = mean(CA),
                    H_CA_log_centred = mean(H_CA_log_centred),
                    year = mean(year))

  Data_bin_19 <- ddply(Data_model19, .(V_class),summarise,
                    N_trees =length(V),
                    V  = mean(V),
                    H  = mean(H),
                    CA = mean(CA),
                    H_CA_log_centred = mean(H_CA_log_centred),
                    year = mean(year))

  Data_bin_y <- rbind(Data_bin_18, Data_bin_19)
  
  # Train model
  M2y<-lm(log(V)~H_CA_log_centred * factor(year),data=Data_bin_y)
  Validation_data$V_pred_M2y<-exp(predict(M2y,Validation_data))
  Validation_data$Error_M2y<-relative_err(Validation_data$V,Validation_data$V_pred_M2y)
  lines(smooth.spline(Validation_data$Error_M2y ~ Validation_data$V,nknots=5),col=alpha("cornflowerblue",0.25),lwd=0.5)

  # Model fit to binned data
  Binned_data_modely[i,"intercept"]<-coef(M2y)[1]
  Binned_data_modely[i,"beta_log_HCA"]<-coef(M2y)[2]
  Binned_data_modely[i,"beta_year"]<-coef(M2y)[3]
  Binned_data_modely[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M2y)
}

M2a H + CA

Binned_data_modela<-data.frame(matrix(nrow=reps, ncol=3))  
colnames(Binned_data_modela)[1:3]<-c("intercept","beta_log_HCA","RMSE") 

plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
     xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)

for (i in 1:reps){
  
  # Set up validation data
  Validation_data <- data.frame()
  for (j in 1:8) {
    tmp <- filter(df, V_class == j) 
    sample <- sample_n(tmp, 24, replace = FALSE)
    Validation_data <- rbind(Validation_data,sample)
  }
  
  # use rest to train model  
  Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
  Data_model<-merge(df,Model_trees)                                       # data for model fitting (90% of trees)

  # Data binning to include year
  Data_model18 <- filter(Data_model, year == 2018)
  Data_model19 <- filter(Data_model, year == 2019)

  Data_bin_18 <- ddply(Data_model18, .(V_class),summarise,
                    N_trees =length(V),
                    V  = mean(V),
                    H  = mean(H),
                    CA = mean(CA),
                    H_CA = mean(H_CA),
                    year = mean(year))

  Data_bin_19 <- ddply(Data_model19, .(V_class),summarise,
                    N_trees =length(V),
                    V  = mean(V),
                    H  = mean(H),
                    CA = mean(CA),
                    H_CA = mean(H_CA),
                    year = mean(year))

  Data_bin_y <- rbind(Data_bin_18, Data_bin_19)
  
  # Train model
  M2a <- lm(log(V)~log(H)+log(CA),data=Data_bin_y)
  Validation_data$V_pred_M2a<-exp(predict(M2a,Validation_data))
  Validation_data$Error_M2a<-relative_err(Validation_data$V,Validation_data$V_pred_M2a)
  lines(smooth.spline(Validation_data$Error_M2a ~ Validation_data$V,nknots=5),col=alpha("cornflowerblue",0.25), lwd=0.5)

  # Model fit to binned data
  Binned_data_modela[i,"intercept"]<-coef(M2a)[1]
  Binned_data_modela[i,"beta_log_HCA"]<-coef(M2a)[2]
  Binned_data_modela[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M2a)
}

M2b H * CA

Binned_data_modelb<-data.frame(matrix(nrow=reps, ncol=3))  
colnames(Binned_data_modelb)[1:3]<-c("intercept","beta_log_HCA","RMSE") 

plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
     xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)

for (i in 1:reps){
  
  # Set up validation data
  Validation_data <- data.frame()
  for (j in 1:8) {
    tmp <- filter(df, V_class == j) 
    sample <- sample_n(tmp, 24, replace = FALSE)
    Validation_data <- rbind(Validation_data,sample)
  }
  
  # use rest to train model  
  Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
  Data_model<-merge(df,Model_trees)                                       # data for model fitting (90% of trees)

  # Data binning to include year
  Data_model18 <- filter(Data_model, year == 2018)
  Data_model19 <- filter(Data_model, year == 2019)

  Data_bin_18 <- ddply(Data_model18, .(V_class),summarise,
                    N_trees =length(V),
                    V  = mean(V),
                    H  = mean(H),
                    CA = mean(CA),
                    H_CA = mean(H_CA),
                    year = mean(year))

  Data_bin_19 <- ddply(Data_model19, .(V_class),summarise,
                    N_trees =length(V),
                    V  = mean(V),
                    H  = mean(H),
                    CA = mean(CA),
                    H_CA = mean(H_CA),
                    year = mean(year))

  Data_bin_y <- rbind(Data_bin_18, Data_bin_19)
  
  # Train model
  M2b <- lm(log(V)~log(H)*log(CA),data=Data_bin_y)
  Validation_data$V_pred_M2b<-exp(predict(M2b,Validation_data))
  Validation_data$Error_M2b<-relative_err(Validation_data$V,Validation_data$V_pred_M2b)
  lines(smooth.spline(Validation_data$Error_M2b ~ Validation_data$V,nknots=5),col=alpha("cornflowerblue",0.25), lwd=0.5)

  # Model fit to binned data
  Binned_data_modelb[i,"intercept"]<-coef(M2b)[1]
  Binned_data_modelb[i,"beta_log_HCA"]<-coef(M2b)[2]
  Binned_data_modelb[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M2b)
}

M3 thinned data model

M3 H_CA

Thinned_data_model<-data.frame(matrix(nrow=reps, ncol=3))  
colnames(Thinned_data_model)[1:3]<-c("intercept","beta_log_HCA","RMSE") 

plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
     xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)

for (i in 1:reps){
  
  # Set up validation data
  Validation_data <- data.frame()
  for (j in 1:8) {
    tmp <- filter(df, V_class == j) 
    sample <- sample_n(tmp, 24, replace = FALSE)
    Validation_data <- rbind(Validation_data,sample)
  }
  
  # use rest to train model  
  Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
  Data_model<-merge(df,Model_trees)                                       # data for model fitting (90% of trees)

  # Data thinning: equal random sub-sample of data bins
  Data_thin <- data.frame()
  for (l in 1:8) {
  tmp <- filter(Data_model, V_class == l) 
  sample <- sample_n(tmp, 40, replace = FALSE)
  Data_thin <- rbind(Data_thin,sample)
  }

  ### Fit models and generate predictions
  M3<-lm(log(V)~log(H_CA),data=Data_thin)
  Validation_data$V_pred_M3<-exp(predict(M3,Validation_data))
  Validation_data$Error_M3<-relative_err(Validation_data$V,Validation_data$V_pred_M3)
  lines(smooth.spline(Validation_data$Error_M3 ~ Validation_data$V,nknots=5),col=alpha("deeppink1",0.25),lwd=0.5)
  
  # Model fit to raw data
  Thinned_data_model[i,"intercept"]<-coef(M3)[1]
  Thinned_data_model[i,"beta_log_HCA"]<-coef(M3)[2]
  Thinned_data_model[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M3)
}

M3_y H_CA + year

Thinned_data_model_y<-data.frame(matrix(nrow=reps, ncol=4))  
colnames(Thinned_data_model_y)[1:4]<-c("intercept","beta_log_HCA","beta_year","RMSE") 

plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
     xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)

for (i in 1:reps){
  
  # Set up validation data
  Validation_data <- data.frame()
  for (j in 1:8) {
    tmp <- filter(df, V_class == j) 
    sample <- sample_n(tmp, 24, replace = FALSE)
    Validation_data <- rbind(Validation_data,sample)
  }
  
  # use rest to train model  
  Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
  Data_model<-merge(df,Model_trees)                                       # data for model fitting (90% of trees)

  # Data thinning: equal random sub-sample of data bins
  Data_thin <- data.frame()
  for (l in 1:8) {
  tmp <- filter(Data_model, V_class == l) 
  sample <- sample_n(tmp, 40, replace = FALSE)
  Data_thin <- rbind(Data_thin,sample)
  }
  
  ### Fit models and generate predictions
  M3_y<-lm(log(V)~log(H_CA) + factor(year),data=Data_thin)
  Validation_data$V_pred_M3_y<-exp(predict(M3_y,Validation_data))
  Validation_data$Error_M3_y<-relative_err(Validation_data$V,Validation_data$V_pred_M3_y)
  lines(smooth.spline(Validation_data$Error_M3_y ~ Validation_data$V,nknots=5),col=alpha("deeppink1",0.25),lwd=0.5)
  
  # Model fit to raw data
  Thinned_data_model_y[i,"intercept"]<-coef(M3_y)[1]
  Thinned_data_model_y[i,"beta_log_HCA"]<-coef(M3_y)[2]
  Thinned_data_model_y[i,"beta_year"]<-coef(M3_y)[3]
  Thinned_data_model_y[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M3_y)
}

M3y H_CA * year

Thinned_data_modely<-data.frame(matrix(nrow=reps, ncol=4))  
colnames(Thinned_data_modely)[1:4]<-c("intercept","beta_log_HCA","beta_year","RMSE") 

plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
     xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)

for (i in 1:reps){
  
  # Set up validation data
  Validation_data <- data.frame()
  for (j in 1:8) {
    tmp <- filter(df, V_class == j) 
    sample <- sample_n(tmp, 24, replace = FALSE)
    Validation_data <- rbind(Validation_data,sample)
  }
  
  # use rest to train model  
  Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
  Data_model<-merge(df,Model_trees)                                       # data for model fitting (90% of trees)

  # Data thinning: equal random sub-sample of data bins
  Data_thin <- data.frame()
  for (l in 1:8) {
  tmp <- filter(Data_model, V_class == l) 
  sample <- sample_n(tmp, 40, replace = FALSE)
  Data_thin <- rbind(Data_thin,sample)
  }
  
  ### Fit models and generate predictions
  M3y<-lm(log(V)~H_CA_log_centred * factor(year),data=Data_thin)
  Validation_data$V_pred_M3y<-exp(predict(M3y,Validation_data))
  Validation_data$Error_M3y<-relative_err(Validation_data$V,Validation_data$V_pred_M3y)
  lines(smooth.spline(Validation_data$Error_M3y ~ Validation_data$V,nknots=5),col=alpha("deeppink1",0.25),lwd=0.5)
  
  # Model fit to raw data
  Thinned_data_modely[i,"intercept"]<-coef(M3y)[1]
  Thinned_data_modely[i,"beta_log_HCA"]<-coef(M3y)[2]
  Thinned_data_modely[i,"beta_year"]<-coef(M3y)[3]
  Thinned_data_modely[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M3y)
}

M3a H + CA

Thinned_data_modela<-data.frame(matrix(nrow=reps, ncol=3))  
colnames(Thinned_data_modela)[1:3]<-c("intercept","beta_log_HCA","RMSE") 

plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
     xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)

for (i in 1:reps){
  
  # Set up validation data
  Validation_data <- data.frame()
  for (j in 1:8) {
    tmp <- filter(df, V_class == j) 
    sample <- sample_n(tmp, 24, replace = FALSE)
    Validation_data <- rbind(Validation_data,sample)
  }
  
  # use rest to train model  
  Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
  Data_model<-merge(df,Model_trees)                                       # data for model fitting (90% of trees)

  # Data thinning: equal random sub-sample of data bins
  Data_thin <- data.frame()
  for (l in 1:8) {
  tmp <- filter(Data_model, V_class == l) 
  sample <- sample_n(tmp, 40, replace = FALSE)
  Data_thin <- rbind(Data_thin,sample)
  }
  
  ### Fit models and generate predictions
  M3a <- lm(log(V)~log(H)+log(CA),data=Data_thin)
  Validation_data$V_pred_M3a<-exp(predict(M3a,Validation_data))
  Validation_data$Error_M3a<-relative_err(Validation_data$V,Validation_data$V_pred_M3a)
  lines(smooth.spline(Validation_data$Error_M3a ~ Validation_data$V,nknots=5),col=alpha("deeppink1",0.25),lwd=0.5)
  
  # Model fit to raw data
  Thinned_data_modela[i,"intercept"]<-coef(M3a)[1]
  Thinned_data_modela[i,"beta_log_HCA"]<-coef(M3a)[2]
  Thinned_data_modela[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M3a)
}

M3b H * CA

Thinned_data_modelb<-data.frame(matrix(nrow=reps, ncol=3))  
colnames(Thinned_data_modelb)[1:3]<-c("intercept","beta_log_HCA","RMSE") 

plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
     xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)

for (i in 1:reps){
  
  # Set up validation data
  Validation_data <- data.frame()
  for (j in 1:8) {
    tmp <- filter(df, V_class == j) 
    sample <- sample_n(tmp, 24, replace = FALSE)
    Validation_data <- rbind(Validation_data,sample)
  }
  
  # use rest to train model  
  Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
  Data_model<-merge(df,Model_trees)                                       # data for model fitting (90% of trees)

  # Data thinning: equal random sub-sample of data bins
  Data_thin <- data.frame()
  for (l in 1:8) {
  tmp <- filter(Data_model, V_class == l) 
  sample <- sample_n(tmp, 40, replace = FALSE)
  Data_thin <- rbind(Data_thin,sample)
  }
  
  ### Fit models and generate predictions
  M3b <- lm(log(V)~log(H)*log(CA),data=Data_thin)
  Validation_data$V_pred_M3b<-exp(predict(M3b,Validation_data))
  Validation_data$Error_M3b<-relative_err(Validation_data$V,Validation_data$V_pred_M3b)
  lines(smooth.spline(Validation_data$Error_M3b ~ Validation_data$V,nknots=5),col=alpha("deeppink1",0.25),lwd=0.5)
  
  # Model fit to raw data
  Thinned_data_modelb[i,"intercept"]<-coef(M3b)[1]
  Thinned_data_modelb[i,"beta_log_HCA"]<-coef(M3b)[2]
  Thinned_data_modelb[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M3b)
}

Mean RMSE

Mean RMSE (m3) for the 100 reps.

min(df$V)
## [1] 0.00256
max(df$V)
## [1] 23.05142
RM1 <- mean(Raw_data_model$RMSE)
RM1_y <- mean(Raw_data_model_y$RMSE)
RM1y <- mean(Raw_data_modely$RMSE)
RM1a <- mean(Raw_data_modela$RMSE)
RM1b <- mean(Raw_data_modelb$RMSE)
RM1all <- c(RM1, RM1_y, RM1y, RM1a, RM1b)

RM2 <- mean(Binned_data_model$RMSE)
RM2_y <- mean(Binned_data_model_y$RMSE)
RM2y <- mean(Binned_data_modely$RMSE)
RM2a <- mean(Binned_data_modela$RMSE)
RM2b <- mean(Binned_data_modelb$RMSE)
RM2all <- c(RM2, RM2_y, RM2y, RM2a, RM2b)

RM3 <- mean(Thinned_data_model$RMSE)
RM3_y <- mean(Thinned_data_model_y$RMSE)
RM3y <- mean(Thinned_data_modely$RMSE)
RM3a <- mean(Thinned_data_modela$RMSE)
RM3b <- mean(Thinned_data_modelb$RMSE)
RM3all <- c(RM3, RM3_y, RM3y, RM3a, RM3b)

# make data frame
data <- c("model", "raw data", "binned data", "thinned data")
model <- c("M_CA", "M_CA + year", "M_CA * year", "M + CA", "M * CA")

dfR <- data.frame(model, RM1all, RM2all, RM3all)
colnames(dfR) <- data
kable(dfR)
model raw data binned data thinned data
M_CA 1.973366 1.645145 1.632187
M_CA + year 1.788147 1.545326 1.501676
M_CA * year 2.010889 1.917560 1.635667
M + CA 1.963079 1.646094 1.643541
M * CA 1.771526 1.756127 1.647579

Model summaries

H_CA

M1 raw

M1 <- lm(log(V) ~ log(H_CA), data = df)
summary(M1)
## 
## Call:
## lm(formula = log(V) ~ log(H_CA), data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.22628 -0.47538  0.02432  0.46293  2.17167 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.937141   0.018406  -159.6   <2e-16 ***
## log(H_CA)    0.720053   0.005992   120.2   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.627 on 1903 degrees of freedom
## Multiple R-squared:  0.8836, Adjusted R-squared:  0.8835 
## F-statistic: 1.444e+04 on 1 and 1903 DF,  p-value: < 2.2e-16

M2 binned

M2 <- lm(log(V) ~ log(H_CA), data = df_bin_y)
summary(M2)
## 
## Call:
## lm(formula = log(V) ~ log(H_CA), data = df_bin_y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37410 -0.28781  0.04137  0.20109  0.48828 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3.30143    0.09159  -36.05 3.29e-15 ***
## log(H_CA)    0.80113    0.02304   34.77 5.43e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2878 on 14 degrees of freedom
## Multiple R-squared:  0.9885, Adjusted R-squared:  0.9877 
## F-statistic:  1209 on 1 and 14 DF,  p-value: 5.434e-15

M3 thinned

M3 <- lm(log(V) ~ log(H_CA), data = df_thin)
summary(M3)
## 
## Call:
## lm(formula = log(V) ~ log(H_CA), data = df_thin)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.28449 -0.43784  0.06039  0.42141  2.08901 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.916855   0.032497  -89.76   <2e-16 ***
## log(H_CA)    0.744127   0.008336   89.27   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6259 on 510 degrees of freedom
## Multiple R-squared:  0.9398, Adjusted R-squared:  0.9397 
## F-statistic:  7968 on 1 and 510 DF,  p-value: < 2.2e-16

H_CA + year

M1_y raw

M1_y<-lm(log(V)~log(H_CA) + factor(year),data=df)
summary(M1_y)
## 
## Call:
## lm(formula = log(V) ~ log(H_CA) + factor(year), data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.44224 -0.38583  0.01784  0.36707  2.40993 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -2.728729   0.021023 -129.80   <2e-16 ***
## log(H_CA)         0.724841   0.005586  129.76   <2e-16 ***
## factor(year)2019 -0.459075   0.026820  -17.12   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5838 on 1902 degrees of freedom
## Multiple R-squared:  0.8991, Adjusted R-squared:  0.899 
## F-statistic:  8475 on 2 and 1902 DF,  p-value: < 2.2e-16

M2_y binned

M2_y<-lm(log(V)~log(H_CA) + factor(year),data=df_bin_y)
summary(M2_y)
## 
## Call:
## lm(formula = log(V) ~ log(H_CA) + factor(year), data = df_bin_y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.43418 -0.15925 -0.01401  0.16980  0.40453 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -3.14420    0.09339 -33.667 4.94e-14 ***
## log(H_CA)         0.80474    0.01889  42.611 2.37e-15 ***
## factor(year)2019 -0.33226    0.11791  -2.818   0.0145 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2353 on 13 degrees of freedom
## Multiple R-squared:  0.9929, Adjusted R-squared:  0.9918 
## F-statistic: 907.9 on 2 and 13 DF,  p-value: 1.088e-14

M3_y thinned

M3_y<-lm(log(V)~log(H_CA) + factor(year),data=df_thin)
summary(M3_y)
## 
## Call:
## lm(formula = log(V) ~ log(H_CA) + factor(year), data = df_thin)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.44122 -0.35008  0.02093  0.36647  2.34024 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -2.769211   0.037072 -74.697  < 2e-16 ***
## log(H_CA)         0.749891   0.007984  93.920  < 2e-16 ***
## factor(year)2019 -0.390583   0.053903  -7.246  1.6e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5965 on 509 degrees of freedom
## Multiple R-squared:  0.9455, Adjusted R-squared:  0.9453 
## F-statistic:  4413 on 2 and 509 DF,  p-value: < 2.2e-16

H_CA * year

M1y raw

M1y<-lm(log(V)~H_CA_log_centred * factor(year),data=df)
summary(M1y)
## 
## Call:
## lm(formula = log(V) ~ H_CA_log_centred * factor(year), data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.43451 -0.37455 -0.00658  0.35573  2.43725 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       -1.332759   0.018202 -73.221  < 2e-16 ***
## H_CA_log_centred                   0.758280   0.007108 106.684  < 2e-16 ***
## factor(year)2019                  -0.456530   0.026447 -17.262  < 2e-16 ***
## H_CA_log_centred:factor(year)2019 -0.083694   0.011245  -7.443 1.48e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5756 on 1901 degrees of freedom
## Multiple R-squared:  0.902,  Adjusted R-squared:  0.9018 
## F-statistic:  5830 on 3 and 1901 DF,  p-value: < 2.2e-16

M2y binned

M2y<-lm(log(V)~H_CA_log_centred * factor(year),data=df_bin_y)
summary(M2y)
## 
## Call:
## lm(formula = log(V) ~ H_CA_log_centred * factor(year), data = df_bin_y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.23417 -0.15098 -0.02025  0.10949  0.35513 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       -1.32647    0.07008 -18.928 2.65e-10 ***
## H_CA_log_centred                   0.80223    0.02196  36.532 1.13e-13 ***
## factor(year)2019                  -0.28654    0.09944  -2.881   0.0138 *  
## H_CA_log_centred:factor(year)2019 -0.04818    0.03073  -1.568   0.1429    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1982 on 12 degrees of freedom
## Multiple R-squared:  0.9953, Adjusted R-squared:  0.9942 
## F-statistic: 854.9 on 3 and 12 DF,  p-value: 2.986e-14

M3y thinned

M3y<-lm(log(V)~H_CA_log_centred * factor(year),data=df_thin)
summary(M3y)
## 
## Call:
## lm(formula = log(V) ~ H_CA_log_centred * factor(year), data = df_thin)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.43874 -0.33668  0.00062  0.35900  2.32077 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       -1.327052   0.034161 -38.847  < 2e-16 ***
## H_CA_log_centred                   0.762556   0.009722  78.437  < 2e-16 ***
## factor(year)2019                  -0.379067   0.053927  -7.029 6.72e-12 ***
## H_CA_log_centred:factor(year)2019 -0.038270   0.016900  -2.265    0.024 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5941 on 508 degrees of freedom
## Multiple R-squared:  0.946,  Adjusted R-squared:  0.9457 
## F-statistic:  2967 on 3 and 508 DF,  p-value: < 2.2e-16

H + CA

M1a raw

M1a <- lm(log(V)~log(H)+log(CA),data=df)
summary(M1a)
## 
## Call:
## lm(formula = log(V) ~ log(H) + log(CA), data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.24551 -0.47381  0.02379  0.46565  2.14827 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.85558    0.05541  -51.54   <2e-16 ***
## log(H)       0.66939    0.03301   20.28   <2e-16 ***
## log(CA)      0.73622    0.01197   61.53   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6267 on 1902 degrees of freedom
## Multiple R-squared:  0.8837, Adjusted R-squared:  0.8836 
## F-statistic:  7228 on 2 and 1902 DF,  p-value: < 2.2e-16

M2a binned

M2a <- lm(log(V)~log(H)+log(CA),data=df_bin_y)
summary(M2a)
## 
## Call:
## lm(formula = log(V) ~ log(H) + log(CA), data = df_bin_y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.36830 -0.23349 -0.01498  0.21028  0.50700 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -2.8953     0.9275  -3.122 0.008102 ** 
## log(H)        0.5693     0.5660   1.006 0.332853    
## log(CA)       0.8812     0.1802   4.889 0.000296 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2954 on 13 degrees of freedom
## Multiple R-squared:  0.9888, Adjusted R-squared:  0.9871 
## F-statistic: 573.6 on 2 and 13 DF,  p-value: 2.094e-13

M3a thinned

M3a <- lm(log(V)~log(H)+log(CA),data=df_thin)
summary(M3a)
## 
## Call:
## lm(formula = log(V) ~ log(H) + log(CA), data = df_thin)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.21624 -0.39669  0.05547  0.38683  2.16591 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3.17619    0.11855  -26.79   <2e-16 ***
## log(H)       0.89842    0.06836   13.14   <2e-16 ***
## log(CA)      0.69976    0.02120   33.00   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6234 on 509 degrees of freedom
## Multiple R-squared:  0.9405, Adjusted R-squared:  0.9402 
## F-statistic:  4019 on 2 and 509 DF,  p-value: < 2.2e-16

H * CA

M1b raw

M1b <- lm(log(V)~log(H)*log(CA),data=df)
summary(M1b)
## 
## Call:
## lm(formula = log(V) ~ log(H) * log(CA), data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.22352 -0.47030  0.02357  0.46002  2.18969 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -2.84068    0.05556 -51.130  < 2e-16 ***
## log(H)          0.64297    0.03426  18.769  < 2e-16 ***
## log(CA)         0.68754    0.02098  32.776  < 2e-16 ***
## log(H):log(CA)  0.03069    0.01087   2.822  0.00482 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6256 on 1901 degrees of freedom
## Multiple R-squared:  0.8842, Adjusted R-squared:  0.884 
## F-statistic:  4839 on 3 and 1901 DF,  p-value: < 2.2e-16

M2b binned

M2b <- lm(log(V)~log(H)*log(CA),data=df_bin_y)
summary(M2b)
## 
## Call:
## lm(formula = log(V) ~ log(H) * log(CA), data = df_bin_y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.32815 -0.22466 -0.06013  0.21588  0.56332 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -2.63800    1.04419  -2.526 0.026596 *  
## log(H)          0.37765    0.66330   0.569 0.579623    
## log(CA)         0.87134    0.18560   4.695 0.000519 ***
## log(H):log(CA)  0.03444    0.05763   0.598 0.561246    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.303 on 12 degrees of freedom
## Multiple R-squared:  0.9891, Adjusted R-squared:  0.9864 
## F-statistic: 363.6 on 3 and 12 DF,  p-value: 4.843e-12

M3b thinned

M3b <- lm(log(V)~log(H)*log(CA),data=df_thin)
summary(M3b)
## 
## Call:
## lm(formula = log(V) ~ log(H) * log(CA), data = df_thin)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1873 -0.3989  0.0380  0.3997  2.1997 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -3.15305    0.11942 -26.403   <2e-16 ***
## log(H)          0.85939    0.07312  11.754   <2e-16 ***
## log(CA)         0.66516    0.03141  21.178   <2e-16 ***
## log(H):log(CA)  0.02400    0.01609   1.492    0.136    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6226 on 508 degrees of freedom
## Multiple R-squared:  0.9407, Adjusted R-squared:  0.9404 
## F-statistic:  2687 on 3 and 508 DF,  p-value: < 2.2e-16

Standardised residual Error plots

M1 raw data model

M1 H_CA

check_model(M1)

M1 H_CA + year

check_model(M1_y)

M1 H_CA * year

check_model(M1y)

M1 H + CA

check_model(M1a)

M1 H * CA

check_model(M1b)

M2 binned data model

M2 H_CA

check_model(M2)

M2_y H_CA + year

check_model(M2_y)

M2y H_CA * year

check_model(M2y)

M2 H + CA

check_model(M2a)

M2 H * CA

check_model(M2b)

M3 thinned data model

M3 H_CA

check_model(M3)

M3_y H_CA + year

check_model(M3_y)

M3y H_CA * year

check_model(M3y)

M3 H + CA

check_model(M3a)

M3 H * CA

check_model(M3b)

Heteroscedasticity test

Studentized Breusch-Pagan test for heteroscedasticity.

p < 0.05 means heteroscedasticity is a problem for the model.

M1 raw data

statM1 <- bptest(M1)[[1]]
statM1_y <- bptest(M1_y)[[1]]
statM1y <- bptest(M1y)[[1]]
statM1a <- bptest(M1a)[[1]]
statM1b <- bptest(M1b)[[1]]
stat1 <- c(statM1[[1]], statM1_y[[1]], statM1y[[1]], statM1a[[1]], statM1b[[1]])

dfM1 <- bptest(M1)[[2]]
dfM1_y <- bptest(M1_y)[[2]]
dfM1y <- bptest(M1y)[[2]]
dfM1a <- bptest(M1a)[[2]]
dfM1b <- bptest(M1b)[[2]]
df1 <- c(dfM1[[1]], dfM1_y[[1]], dfM1y[[1]], dfM1a[[1]], dfM1b[[1]])

pM1 <- bptest(M1)[[4]]
pM1_y <- bptest(M1_y)[[4]]
pM1y <- bptest(M1y)[[4]]
pM1a <- bptest(M1a)[[4]]
pM1b <- bptest(M1b)[[4]]
p1 <- c(pM1[[1]], pM1_y[[1]], pM1y[[1]], pM1a[[1]], pM1b[[1]])

rown <- c("M_CA", "M_CA + year", "M_CA * year", "M + CA", "M * CA")
bp1 <- data.frame(rown, stat1, df1, p1)
coln <- c("model","BP", "df", "p-value")
colnames(bp1) <- coln
kable(bp1)
model BP df p-value
M_CA 12.73989 1 0.0003579
M_CA + year 53.13540 2 0.0000000
M_CA * year 68.69836 3 0.0000000
M + CA 18.16733 2 0.0001135
M * CA 36.53715 3 0.0000001

M2 binned data

statM2 <- bptest(M2)[[1]]
statM2_y <- bptest(M2_y)[[1]]
statM2y <- bptest(M2y)[[1]]
statM2a <- bptest(M2a)[[1]]
statM2b <- bptest(M2b)[[1]]
stat2 <- c(statM2[[1]], statM2_y[[1]], statM2y[[1]], statM2a[[1]], statM2b[[1]])

dfM2 <- bptest(M2)[[2]]
dfM2_y <- bptest(M2_y)[[2]]
dfM2y <- bptest(M2y)[[2]]
dfM2a <- bptest(M2a)[[2]]
dfM2b <- bptest(M2b)[[2]]
df2 <- c(dfM2[[1]], dfM2_y[[1]], dfM2y[[1]], dfM2a[[1]], dfM2b[[1]])

pM2 <- bptest(M2)[[4]]
pM2_y <- bptest(M2_y)[[4]]
pM2y <- bptest(M2y)[[4]]
pM2a <- bptest(M2a)[[4]]
pM2b <- bptest(M2b)[[4]]
p2 <- c(pM2[[1]], pM2_y[[1]], pM2y[[1]], pM2a[[1]], pM2b[[1]])

rown <- c("M_CA", "M_CA + year", "M_CA * year", "M + CA", "M * CA")
bp2 <- data.frame(rown, stat2, df2, p2)
coln <- c("model","BP", "df", "p-value")
colnames(bp2) <- coln
kable(bp2)
model BP df p-value
M_CA 0.8231927 1 0.3642484
M_CA + year 2.4746043 2 0.2901660
M_CA * year 4.2440677 3 0.2362867
M + CA 1.0411573 2 0.5941766
M * CA 4.6880527 3 0.1961173

M3 thinned data

statM3 <- bptest(M3)[[1]]
statM3_y <- bptest(M3_y)[[1]]
statM3y <- bptest(M3y)[[1]]
statM3a <- bptest(M3a)[[1]]
statM3b <- bptest(M3b)[[1]]
stat3 <- c(statM3[[1]], statM3_y[[1]], statM3y[[1]], statM3a[[1]], statM3b[[1]])

dfM3 <- bptest(M3)[[2]]
dfM3_y <- bptest(M3_y)[[2]]
dfM3y <- bptest(M3y)[[2]]
dfM3a <- bptest(M3a)[[2]]
dfM3b <- bptest(M3b)[[2]]
df3 <- c(dfM3[[1]], dfM3_y[[1]], dfM3y[[1]], dfM3a[[1]], dfM3b[[1]])

pM3 <- bptest(M3)[[4]]
pM3_y <- bptest(M3_y)[[4]]
pM3y <- bptest(M3y)[[4]]
pM3a <- bptest(M3a)[[4]]
pM3b <- bptest(M3b)[[4]]
p3 <- c(pM3[[1]], pM3_y[[1]], pM3y[[1]], pM3a[[1]], pM3b[[1]])

rown <- c("M_CA", "M_CA + year", "M_CA * year", "M + CA", "M * CA")
bp3 <- data.frame(rown, stat3, df3, p3)
coln <- c("model","BP", "df", "p-value")
colnames(bp3) <- coln
kable(bp3)
model BP df p-value
M_CA 13.30107 1 0.0002653
M_CA + year 20.94477 2 0.0000283
M_CA * year 22.13246 3 0.0000612
M + CA 8.60780 2 0.0135157
M * CA 22.30937 3 0.0000562

AIC

Models:
M = H_CA
M_y = H_CA + year
My = H_CA * year
Ma = H + CA
Mb = H * CA

M1 raw data

AICctab(M1, M1_y, M1y, M1a, M1b, base = TRUE)
##      AICc   dAICc  df
## M1y  3307.7    0.0 5 
## M1_y 3360.5   52.7 4 
## M1b  3625.0  317.2 5 
## M1a  3631.0  323.2 4 
## M1   3631.4  323.6 3

M2 data binning

AICctab(M2, M2_y, M2y, M2a, M2b, base = TRUE)
##      AICc dAICc df
## M2y   5.0  0.0  5 
## M2_y  7.4  2.4  4 
## M2   11.4  6.4  3 
## M2a  14.7  9.7  4 
## M2b  18.6 13.6  5

M3 data thinning

AICctab(M3, M3_y, M3y, M3a, M3b, base = TRUE)
##      AICc  dAICc df
## M3y  925.9   0.0 5 
## M3_y 929.0   3.1 4 
## M3b  973.9  48.0 5 
## M3a  974.1  48.2 4 
## M3   977.2  51.3 3

Plot fit vs observed

M1 = raw data
M2 = binned data
M3 = thinned data

M = H_CA
M_y = H_CA + year
My = H_CA * year
Ma = H + CA
Mb = H * CA

Fit models

M1

pred1 <- predict(M1, newdata = df, se.fit = TRUE)
fit1 <- pred1$fit
se1 <- pred1$se.fit

preddata1 <- data.frame(df, fit=fit1, se=se1)

critval <- 1.96 ## approx 95% CI
preddata1$upper <- preddata1$fit + (critval * preddata1$se)
preddata1$lower <- preddata1$fit - (critval * preddata1$se)

M2

pred2 <- predict(M2, newdata = df_bin_y, se.fit = TRUE)
fit2 <- pred2$fit
se2 <- pred2$se.fit

preddata2 <- data.frame(df_bin_y, fit=fit2, se=se2)

critval <- 1.96 ## approx 95% CI
preddata2$upper <- preddata2$fit + (critval * preddata2$se)
preddata2$lower <- preddata2$fit - (critval * preddata2$se)

M3

pred3 <- predict(M3, newdata =df_thin, se.fit = TRUE)
fit3 <- pred3$fit
se3 <- pred3$se.fit

preddata3 <- data.frame(df_thin, fit=fit3, se=se3)

critval <- 1.96 ## approx 95% CI
preddata3$upper <- preddata3$fit + (critval * preddata3$se)
preddata3$lower <- preddata3$fit - (critval * preddata3$se)

M1_y

pred1_y <- predict(M1_y, newdata = df, se.fit = TRUE)
fit1_y <- pred1_y$fit
se1_y <- pred1_y$se.fit

preddata1_y <- data.frame(df, fit=fit1_y, se=se1_y)

critval <- 1.96 ## approx 95% CI
preddata1_y$upper <- preddata1_y$fit + (critval * preddata1_y$se)
preddata1_y$lower <- preddata1_y$fit - (critval * preddata1_y$se)

M2_y

pred2_y <- predict(M2_y, newdata = df_bin_y, se.fit = TRUE)
fit2_y <- pred2_y$fit
se2_y <- pred2_y$se.fit

preddata2_y <- data.frame(df_bin_y, fit=fit2_y, se=se2_y)

critval <- 1.96 ## approx 95% CI
preddata2_y$upper <- preddata2_y$fit + (critval * preddata2_y$se)
preddata2_y$lower <- preddata2_y$fit - (critval * preddata2_y$se)

M3_y

pred3_y <- predict(M3_y, newdata = df_thin, se.fit = TRUE)
fit3_y <- pred3_y$fit
se3_y <- pred3_y$se.fit

preddata3_y <- data.frame(df_thin, fit=fit3_y, se=se3_y)

critval <- 1.96 ## approx 95% CI
preddata3_y$upper <- preddata3_y$fit + (critval * preddata3_y$se)
preddata3_y$lower <- preddata3_y$fit - (critval * preddata3_y$se)

M1y

pred1y <- predict(M1y, newdata = df, se.fit = TRUE)
fit1y <- pred1y$fit
se1y <- pred1y$se.fit

preddata1y <- data.frame(df, fit=fit1_y, se=se1y)

critval <- 1.96 ## approx 95% CI
preddata1y$upper <- preddata1y$fit + (critval * preddata1y$se)
preddata1y$lower <- preddata1y$fit - (critval * preddata1y$se)

M2y

pred2y <- predict(M2y, newdata = df_bin_y, se.fit = TRUE)
fit2y <- pred2y$fit
se2y <- pred2y$se.fit

preddata2y <- data.frame(df_bin_y, fit=fit2y, se=se2y)

critval <- 1.96 ## approx 95% CI
preddata2y$upper <- preddata2y$fit + (critval * preddata2y$se)
preddata2y$lower <- preddata2y$fit - (critval * preddata2y$se)

M3y

pred3y <- predict(M3y, newdata = df_thin, se.fit = TRUE)
fit3y <- pred3y$fit
se3y <- pred3y$se.fit

preddata3y <- data.frame(df_thin, fit=fit3y, se=se3y)

critval <- 1.96 ## approx 95% CI
preddata3y$upper <- preddata3y$fit + (critval * preddata3y$se)
preddata3y$lower <- preddata3y$fit - (critval * preddata3y$se)

M1a

pred1a <- predict(M1a, newdata = df, se.fit = TRUE)
fit1a <- pred1a$fit
se1a <- pred1a$se.fit

preddata1a <- data.frame(df, fit=fit1a, se=se1a)

critval <- 1.96 ## approx 95% CI
preddata1a$upper <- preddata1a$fit + (critval * preddata1a$se)
preddata1a$lower <- preddata1a$fit - (critval * preddata1a$se)

M2a

pred2a <- predict(M2a, newdata = df_bin_y, se.fit = TRUE)
fit2a <- pred2a$fit
se2a <- pred2a$se.fit

preddata2a <- data.frame(df_bin_y, fit=fit2a, se=se2a)

critval <- 1.96 ## approx 95% CI
preddata2a$upper <- preddata2a$fit + (critval * preddata2a$se)
preddata2a$lower <- preddata2a$fit - (critval * preddata2a$se)

M3a

pred3a <- predict(M3a, newdata = df_thin, se.fit = TRUE)
fit3a <- pred3a$fit
se3a <- pred3a$se.fit

preddata3a <- data.frame(df_thin, fit=fit3a, se=se3a)

critval <- 1.96 ## approx 95% CI
preddata3a$upper <- preddata3a$fit + (critval * preddata3a$se)
preddata3a$lower <- preddata3a$fit - (critval * preddata3a$se)

M1b

pred1b <- predict(M1b, newdata = df, se.fit = TRUE)
fit1b <- pred1b$fit
se1b <- pred1b$se.fit

preddata1b <- data.frame(df, fit=fit1b, se=se1b)

critval <- 1.96 ## approx 95% CI
preddata1b$upper <- preddata1b$fit + (critval * preddata1b$se)
preddata1b$lower <- preddata1b$fit - (critval * preddata1b$se)

M2b

pred2b <- predict(M2b, newdata = df_bin_y, se.fit = TRUE)
fit2b <- pred2b$fit
se2b <- pred2b$se.fit

preddata2b <- data.frame(df_bin_y, fit=fit2b, se=se2b)

critval <- 1.96 ## approx 95% CI
preddata2b$upper <- preddata2b$fit + (critval * preddata2b$se)
preddata2b$lower <- preddata2b$fit - (critval * preddata2b$se)

M3b

pred3b <- predict(M3b, newdata = df_thin, se.fit = TRUE)
fit3b <- pred3b$fit
se3b <- pred3b$se.fit

preddata3b <- data.frame(df_thin, fit=fit3b, se=se3b)

critval <- 1.96 ## approx 95% CI
preddata3b$upper <- preddata3b$fit + (critval * preddata3b$se)
preddata3b$lower <- preddata3b$fit - (critval * preddata3b$se)

Plot models

Plot predictions H_CA

M1 raw

ggplot(preddata1, aes(x = log(V), y = fit1)) + geom_point(alpha=0.2) +
  geom_smooth() +
  theme_bw()+
  theme(aspect.ratio = 1) +
  geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

M2 binned

ggplot(preddata2, aes(x = log(V), y = fit2)) + geom_point() +
  geom_smooth() +
  theme_bw()+
  theme(aspect.ratio = 1) +
  geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

M3 thinned

ggplot(preddata3, aes(x = log(V), y = fit3)) + geom_point() +
  geom_smooth() +
  theme_bw()+
  theme(aspect.ratio = 1) +
  geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Plot predictions H_CA + year

M1_y raw

ggplot(preddata1_y, aes(x = log(V), y = fit1_y)) + geom_point(alpha=0.2) +
  geom_smooth() +
  theme_bw()+
  theme(aspect.ratio = 1) +
  geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

M2_y binned

ggplot(preddata2_y, aes(x = log(V), y = fit2_y)) + geom_point() +
  geom_smooth() +
  theme_bw()+
  theme(aspect.ratio = 1) +
  geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

M3_y thinned

ggplot(preddata3_y, aes(x = log(V), y = fit3_y)) + geom_point() +
  geom_smooth() +
  theme_bw()+
  theme(aspect.ratio = 1) +
  geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Plot predictions H_CA * year

M1y raw

ggplot(preddata1y, aes(x = log(V), y = fit1y)) + geom_point(alpha=0.2) +
  geom_smooth() +
  theme_bw()+
  theme(aspect.ratio = 1) +
  geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

M2y binned

ggplot(preddata2y, aes(x = log(V), y = fit2y)) + geom_point() +
  geom_smooth() +
  theme_bw()+
  theme(aspect.ratio = 1) +
  geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

M3y thinned

ggplot(preddata3y, aes(x = log(V), y = fit3y)) + geom_point() +
  geom_smooth() +
  theme_bw()+
  theme(aspect.ratio = 1) +
  geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Plot predictions H + CA

M1a raw

ggplot(preddata1a, aes(x = log(V), y = fit1a)) + geom_point(alpha=0.2) +
  geom_smooth() +
  theme_bw()+
  theme(aspect.ratio = 1) +
  geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

M2a binned

ggplot(preddata2a, aes(x = log(V), y = fit2a)) + geom_point() +
  geom_smooth() +
  theme_bw()+
  theme(aspect.ratio = 1) +
  geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

M3a thinned

ggplot(preddata3a, aes(x = log(V), y = fit3a)) + geom_point() +
  geom_smooth() +
  theme_bw()+
  theme(aspect.ratio = 1) +
  geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Plot predictions H * CA

M1b raw

ggplot(preddata1b, aes(x = log(V), y = fit1b)) + geom_point(alpha=0.2) +
  geom_smooth() +
  theme_bw()+
  theme(aspect.ratio = 1) +
  geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

M2b binned

ggplot(preddata2b, aes(x = log(V), y = fit2b)) + geom_point() +
  geom_smooth() +
  theme_bw()+
  theme(aspect.ratio = 1) +
  geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

M3b thinned

ggplot(preddata3b, aes(x = log(V), y = fit3b)) + geom_point() +
  geom_smooth() +
  theme_bw()+
  theme(aspect.ratio = 1) +
  geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'


  1. Jucker, Tommaso, et al. “Allometric equations for integrating remote sensing imagery into forest monitoring programmes.” Global change biology 23.1 (2017): 177-190. https://doi.org/10.1111/gcb.13388↩︎